library(tidyverse)
library(ggridges)
library(lubridate)
library(scales)
library(corrplot)
library(factoextra)
library(roll)
library(RolWinMulCor)
library(BINCOR)
library(corrr)
dat = read_csv("../Data/final_combined_dates_filled_v1.csv")
dat$sampledate = as.Date(paste0(dat$year-1, "-12-31")) + dat$daynum_fill
dat %>%
mutate(lakeid = factor(lakeid, levels = c("AL", "BM", "CB", "CR", "SP", "TB", "TR", "FI", "ME", "MO", "WI"))) %>%
ggplot(aes(year, metric, fill=filled_flag)) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~lakeid, nrow=6)
lakes_order = c("AL", "BM", "CB", "CR", "SP", "TB", "TR", "FI", "ME", "MO", "WI")
vars_order = c("iceoff", "straton", "chlor_spring", "secchi_openwater", "daphnia_biomass", "doc", "chlor_all", "anoxia_summer", "stability", "energy", "chlor_fall", "stratoff", "iceon")
dat %>%
filter(metric %in% vars_order) %>%
mutate(lakeid = factor(lakeid, levels = lakes_order),
metric = factor(metric, levels = rev(vars_order), labels = rev(c("iceoff", "straton", "spring bloom", "clearwater", "daphnia", "doc", "peak chl", "anoxia summer", "stability", "energy", "fall bloom", "stratoff", "iceon")))) %>%
ggplot() +
stat_density_ridges(aes(x = as.Date(daynum, origin = as.Date('2019-01-01')),
y= metric, col = metric, fill = metric),
alpha = 0.5, quantile_lines = T, quantiles = 2) +
scale_x_date(labels = date_format("%b")) +
facet_wrap(~ (lakeid)) +
xlab('') + ylab('Density')+
theme_minimal() +
theme(axis.text=element_text(size=8))
source("../src/timing_matrix_construction.R")
vars_order_eigen = c("iceoff", "straton", "secchi_openwater", "daphnia_biomass", "doc", "chlor_spring", "anoxia_summer", "stability", "energy", "stratoff", "iceon")
all_var_combos = expand.grid(vars_order_eigen, vars_order_eigen)
all_lake_years = dat %>%
filter(metric %in% vars_order_eigen) %>%
select(lakeid, year) %>%
distinct() %>%
rename(id = lakeid)
all_lyv = expand_grid(all_var_combos, all_lake_years)
all_lyv_1 = dat %>%
filter(metric %in% vars_order_eigen) %>%
select(lakeid, year, metric, daynum_fill) %>%
rename(Var1 = metric, id=lakeid, value=daynum_fill) %>%
right_join(all_lyv) %>%
rename(Value1 = value)
all_lyv_2 = dat %>%
filter(metric %in% vars_order) %>%
select(lakeid, year, metric, daynum_fill) %>%
rename(Var2 = metric, id=lakeid, value=daynum_fill) %>%
right_join(all_lyv_1) %>%
rename(Value2 = value) %>%
select(id, year, Var1, Value1, Var2, Value2) %>%
mutate(doy_diff = Value2 - Value1)
lake_years = dat %>%
select(lakeid, year) %>%
distinct()
lake_years$eigen = as.numeric(NA)
lakes = unique(lake_years$lakeid)
for(l in lakes){
hold_mats = matrix_single_lake(timings.df = all_lyv_2, single_lake = l)
for(i in 1:length(dimnames(hold_mats)[[3]])){
cur_year = dimnames(hold_mats)[[3]][i]
cur_ind = lake_years$lakeid == l & lake_years$year == as.numeric(cur_year)
x = abs(hold_mats[,,i])
error <- try(eigen(x), silent = T)
if (class(error) != "try-error"){
lake_years[cur_ind, "eigen"] = max(error$values)
}
}
}
ggplot(lake_years %>% arrange(year)) +
geom_point(aes(year, eigen, col=lakeid, group=lakeid)) +
geom_line(aes(year, eigen, col=lakeid, group=lakeid)) +
scale_color_brewer(palette = "Paired") +
theme_bw()
eigen_corrs = lake_years %>%
pivot_wider(names_from = lakeid, values_from = eigen) %>%
select(-year) %>%
cor(use="complete.obs")
# diag(eigen_corrs) = 0
eigen_corrs %>%
corrplot(method="circle")
Setup select lakes and variables to analyze:
var_pca = c("iceoff", "straton", "secchi_openwater", "dapnia_biomass", "doc", "chlor_spring", "anoxia_summer", "stability", "energy", "stratoff", "iceon")
lakeid_pca = c("FI", "ME", "MO", "WI", "AL", "BM", "CB", "CR", "SP", "TB", "TR")
dat_pca = dat %>%
filter(metric %in% var_pca & lakeid %in% lakeid_pca) %>%
select(lakeid, year, metric, daynum_fill) %>%
pivot_wider(names_from = "metric", values_from="daynum_fill")
Do the PCA, plot results:
pca = prcomp(dat_pca[, 3:ncol(dat_pca)], scale=T, center=T)
fviz_eig(pca)
fviz_pca_var(pca,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
var = get_pca_var(pca)
inds = get_pca_ind(pca)
pca_res = dat_pca[, c("lakeid", "year")]
pca_res$PC1 = pca$x[,1]
pca_res$PC2 = pca$x[,2]
pca_res$PC3 = pca$x[,3]
pca_res$PC4 = pca$x[,4]
pca_res_cor = pca_res %>%
pivot_longer(cols = c("PC1", "PC2", "PC3", "PC4")) %>%
pivot_wider(names_from = "lakeid", values_from="value") %>%
arrange(name, year)
pca_matrix_p1 = pca_res_cor %>%
filter(name == "PC1") %>%
select(-year) %>%
summarise(as.data.frame(cor(.[,lakeid_pca], use="complete.obs"))) %>%
as.matrix()
rownames(pca_matrix_p1) = lakeid_pca
# diag(pca_matrix) = 0
pca_matrix_p2 = pca_res_cor %>%
filter(name == "PC2") %>%
select(-year) %>%
summarise(as.data.frame(cor(.[,lakeid_pca], use="complete.obs"))) %>%
as.matrix()
rownames(pca_matrix_p2) = lakeid_pca
# diag(pca_matrix) = 0
# dev.off()
par(mfrow=c(1,2))
corrplot(pca_matrix_p1, method="circle", title="PC1", mar=c(0,0,1,0))
corrplot(pca_matrix_p2, method="circle", title = "PC2", mar=c(0,0,1,0))
var_pca_noice = c("straton", "secchi_openwater", "daphnia_biomass", "doc", "chlor_spring", "anoxia_summer", "stability", "energy", "stratoff")
lakeid_pca = c("FI", "ME", "MO", "WI", "AL", "BM", "CB", "CR", "SP", "TB", "TR")
dat_pca_noice = dat %>%
filter(metric %in% var_pca_noice & lakeid %in% lakeid_pca) %>%
select(lakeid, year, metric, daynum_fill) %>%
pivot_wider(names_from = "metric", values_from="daynum_fill")
pca_noice = prcomp(dat_pca_noice[, 3:ncol(dat_pca_noice)], scale=T, center=T)
fviz_eig(pca_noice)
fviz_pca_var(pca_noice,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
var = get_pca_var(pca_noice)
inds = get_pca_ind(pca_noice)
pca_res_noice = dat_pca_noice[, c("lakeid", "year")]
pca_res_noice$PC1 = pca_noice$x[,1]
pca_res_noice$PC2 = pca_noice$x[,2]
pca_res_noice$PC3 = pca_noice$x[,3]
pca_res_noice$PC4 = pca_noice$x[,4]
pca_res_cor_noice = pca_res_noice %>%
pivot_longer(cols = c("PC1", "PC2", "PC3", "PC4")) %>%
pivot_wider(names_from = "lakeid", values_from="value") %>%
arrange(name, year)
pca_matrix_p1_noice = pca_res_cor_noice %>%
filter(name == "PC1") %>%
select(-year) %>%
summarise(as.data.frame(cor(.[,lakeid_pca], use="complete.obs"))) %>%
as.matrix()
rownames(pca_matrix_p1_noice) = lakeid_pca
# diag(pca_matrix) = 0
pca_matrix_p2_noice = pca_res_cor_noice %>%
filter(name == "PC2") %>%
select(-year) %>%
summarise(as.data.frame(cor(.[,lakeid_pca], use="complete.obs"))) %>%
as.matrix()
rownames(pca_matrix_p2_noice) = lakeid_pca
par(mfrow=c(1,2))
corrplot(pca_matrix_p1_noice, method="circle", title="PC1", mar=c(0,0,1,0))
corrplot(pca_matrix_p2_noice, method="circle", title = "PC2", mar=c(0,0,1,0))
dat_pca_N = dat %>%
filter(metric %in% var_pca & lakeid %in% c("AL", "BM", "CB", "CR", "SP", "TB", "TR")) %>%
select(lakeid, year, metric, daynum_fill) %>%
pivot_wider(names_from = "metric", values_from="daynum_fill")
dat_pca_S = dat %>%
filter(metric %in% var_pca & lakeid %in% c("FI", "ME", "MO", "WI")) %>%
select(lakeid, year, metric, daynum_fill) %>%
pivot_wider(names_from = "metric", values_from="daynum_fill")
pca_N = prcomp(dat_pca_N[, 3:ncol(dat_pca_N)], scale=T, center=T)
pca_S = prcomp(dat_pca_S[, 3:ncol(dat_pca_S)], scale=T, center=T)
# fviz_eig(pca_noice)
pca_plot_N = fviz_pca_var(pca_N,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE,
title ="N lakes") # Avoid text overlapping
pca_plot_S = fviz_pca_var(pca_S,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE,
title ="S lakes")
gridExtra::grid.arrange(pca_plot_N, pca_plot_S, nrow=1)
cor_vars = vars_order
dat %>%
filter(metric %in% cor_vars) %>%
select(lakeid, year, metric, daynum_fill) %>%
pivot_wider(names_from = metric, values_from = daynum_fill) %>%
select(-lakeid, -year) %>%
correlate() %>%
network_plot(min_cor = 0.2)
cor_vars = vars_order
# "AL", "BM", "CB", "CR", "SP", "TB", "TR", "FI", "ME", "MO", "WI"
pN = dat %>%
filter(metric %in% cor_vars & lakeid %in% c("AL", "BM", "CB", "CR", "SP", "TB", "TR")) %>%
select(lakeid, year, metric, daynum_fill) %>%
pivot_wider(names_from = metric, values_from = daynum_fill) %>%
select(-lakeid, -year) %>%
correlate() %>%
network_plot(min_cor = 0.2, colours = c("red", "white", "blue")) +
ggtitle("Northern Lakes")
pS = dat %>%
filter(metric %in% cor_vars & lakeid %in% c("FI", "ME", "MO", "WI")) %>%
select(lakeid, year, metric, daynum_fill) %>%
pivot_wider(names_from = metric, values_from = daynum_fill) %>%
select(-lakeid, -year) %>%
correlate() %>%
network_plot(min_cor = 0.2, colours = c("red", "white", "blue")) +
ggtitle("Southern Lakes")
gridExtra::grid.arrange(pN, pS, nrow=1)
(I’m missing code for the rolling window correlation analysis Kait did)